Area Overview.
Satellite View.
From the describe function (output hidden) we see that:
there are no missing values
there do not appear to be any duplicates
the data is evenly distributed between each Cover Type: \(2,160\) rows each
the distribution is skewed across Wilderness Area:
1 - Rawah Wilderness Area: 3597
2 - Neota Wilderness Area: 499
3 - Comanche Peak Wilderness Area: 6349
4 - Cache la Poudre Wilderness Area: 4675
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=Elevation)) +
geom_boxplot(aes(fill=Cover, group=Cover)) +
ggtitle('Fig 1: Elevation')
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=Slope)) +
geom_boxplot(aes(fill=Cover, group=Cover)) +
ggtitle('Fig 2: Slope')
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=Aspect)) +
geom_boxplot(aes(fill=Cover, group=Cover)) +
#geom_jitter(alpha=0.2, width=0.1, height=0.0) +
ggtitle('Fig 3: Aspect seems like a poor predictor')
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=Horizontal_Distance_To_Hydrology)) +
geom_boxplot(aes(fill=Cover, group=Cover)) +
ggtitle('Fig 4: Horiz Dist to Hydro')
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=Vertical_Distance_To_Hydrology)) +
geom_boxplot(aes(fill=Cover, group=Cover)) +
ggtitle('Fig 5: Vertical Distance to Hydro seems
like a poor predictor')
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=Horizontal_Distance_To_Fire_Points)) +
geom_boxplot(aes(fill=Cover, group=Cover)) +
ggtitle('Fig x: Horiz Distance to Fire')
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Slope, y=Elevation)) +
geom_point(alpha=.1)+
facet_grid(.~forest$Cover) +
ggtitle('Fig 6: Elevation vs Slope per Cover category')
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=Hillshade_Noon)) +
geom_boxplot(alpha=.1)+
#facet_grid(.~forest$Cover) +
ggtitle('Fig 7: Aspect vs Slope per Cover category')
forest <- forest %>%
mutate(all_wild = Wilderness_Area1+Wilderness_Area2+Wilderness_Area3+Wilderness_Area4)
summary(forest$all_wild)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
Based on this we can see that each tree is assigned to one and only one Wilderness_Area.
Converting Wilderness_Area to a single column:
factors = c(12:15)
wilderness_area = rep(0, length(forest))
for(f in factors){
colvalues=forest[,f]
wilderness_area[which(colvalues==1)] = f-11
}
forest$all_wilderness_area <- wilderness_area
summary(forest$all_wilderness_area)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 2.0 3.0 2.8 4.0 4.0
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=all_wilderness_area)) +
geom_jitter(alpha=0.1) +
ggtitle('Fig x: Wilderness Area seems to be a predictor of Cover')+
ylab('Wilderness Area')
factors = c(16:55)
soil_type = rep(0, length(forest))
for(f in factors){
colvalues=forest[,f]
soil_type[which(colvalues==1)]=soil_type[which(colvalues==1)]+1
}
forest$all_soil_type <- soil_type
max(forest$all_soil_type)
## [1] NA
summary(soil_type)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 1 1 1 1 1 15061
Based on this we can see that each tree is assigned to one and only one Soil_Type.
Converting Soil_Type to a factor:
factors = c(16:55)
soil_type = rep(0, length(forest))
for(f in factors){
colvalues=forest[,f]
soil_type[which(colvalues==1)] = f-15
}
forest$all_soil_type <- soil_type
summary(forest$all_soil_type)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 10.00 17.00 19.17 30.00 40.00
forest %>%
mutate(Cover = as.factor(Cover_Type)) %>%
ggplot(aes(x=Cover, y=all_soil_type)) +
geom_jitter(alpha=.1)+
ggtitle('Fig x: Soil Type seems to be a predictor of Cover')
forest %>%
ggplot(aes(x=all_soil_type, y=all_wilderness_area)) +
geom_jitter(alpha=.1) +
facet_grid(.~forest$Cover) +
ggtitle('Fig x: Soil Type seems to be a predictor of Cover')
library(nnet)
forest_tmp <- forest %>%
mutate(wildarea = as.factor(all_wilderness_area)) %>%
mutate(soiltype = as.factor(all_soil_type))
mod <- multinom(data=forest_tmp, Cover~Elevation + wildarea)
## # weights: 42 (30 variable)
## initial value 29422.161454
## iter 10 value 22489.965485
## iter 20 value 18155.112778
## iter 30 value 15278.359338
## iter 40 value 14201.820304
## iter 50 value 14181.132533
## iter 60 value 14177.416535
## final value 14177.353532
## converged
summary(mod)
## Call:
## multinom(formula = Cover ~ Elevation + wildarea, data = forest_tmp)
##
## Coefficients:
## (Intercept) Elevation wildarea2 wildarea3 wildarea4
## 2 23.65555 -0.007838952 0.6095364 0.0932434 4.8938514
## 3 71.94577 -0.031387408 10.4127513 16.1089596 17.5496549
## 4 83.82780 -0.035079338 13.1048834 2.2783062 14.4994439
## 5 43.15569 -0.014817494 -46.3362704 0.8737884 -2.7748701
## 6 67.17515 -0.030546283 20.5220075 18.8211409 20.3142486
## 7 -56.17451 0.017173359 -1.1246601 0.4832609 0.8281581
##
## Std. Errors:
## (Intercept) Elevation wildarea2 wildarea3 wildarea4
## 2 1.605222e-04 1.286866e-05 1.803751e-03 3.731080e-02 4.122955e-04
## 3 1.815021e-03 2.344580e-05 3.387814e-10 1.424904e-02 1.606351e-02
## 4 4.498677e-05 2.594585e-05 1.044617e-08 1.057492e-08 4.496550e-05
## 5 1.289519e-04 1.599588e-05 1.348750e-24 2.761592e-02 3.634967e-06
## 6 1.782303e-03 2.299793e-05 8.589032e-07 1.426924e-02 1.605050e-02
## 7 1.679216e-05 1.320445e-05 6.011271e-04 2.191129e-03 6.147223e-13
##
## Residual Deviance: 28354.71
## AIC: 28414.71